In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from plotly.graph_objs import *
from scipy import stats
import scipy as sp
In [2]:
ECG_Features = pd.read_csv("../Data/DREAMER_Extracted_ECG.csv")
del ECG_Features['Unnamed: 0']
# Number of features : Nf = 30
Nf=ECG_Features.shape[1]-1
ECG_Features.head()
Out[2]:
ECG_Rate_Mean HRV_RMSSD HRV_MeanNN HRV_SDNN HRV_SDSD HRV_CVNN HRV_CVSD HRV_MedianNN HRV_MadNN HRV_MCVNN ... HRV_LFn HRV_HFn HRV_LnHF HRV_SD1 HRV_SD2 HRV_SD2SD1 HRV_CSI HRV_CVI HRV_CSI_Modified HRV_SampEn
0 1.019883 0.937463 0.980715 0.942895 0.933189 0.961436 0.955897 0.981730 1.000000 1.018617 ... NaN NaN NaN 0.933189 0.933189 1.0 1.0 0.978786 0.933189 0.858731
1 0.901621 1.646394 1.108991 1.212197 1.641699 1.093063 1.484588 1.113345 1.104762 0.992243 ... NaN NaN NaN 1.641699 1.641699 1.0 1.0 1.172813 1.641699 1.351139
2 1.033613 0.973727 0.967888 1.094067 0.968126 1.130365 1.006033 0.980392 1.111111 1.133333 ... NaN NaN NaN 0.968126 0.968126 1.0 1.0 0.990580 0.968126 0.853926
3 0.951886 1.235442 1.049724 1.190338 1.236088 1.133953 1.176921 1.056122 1.142857 1.082126 ... NaN NaN NaN 1.236088 1.236088 1.0 1.0 1.067677 1.236088 0.804541
4 1.063608 0.747780 0.942383 0.685394 0.745794 0.727298 0.793499 0.933333 0.786765 0.842962 ... NaN NaN NaN 0.745794 0.745794 1.0 1.0 0.916515 0.745794 1.301240

5 rows × 30 columns

In [3]:
from ipywidgets import widgets
import plotly.express as px

ECG_Feature_Widget = widgets.Dropdown(
    options=list(ECG_Features.columns[0:Nf]),
    value='ECG_Rate_Mean',
    description='Feature :',
)

data=ECG_Features['ECG_Rate_Mean']
graphic = go.Histogram(x=data, opacity=0.75, name='Some data')
g = go.FigureWidget(data=[graphic],
                    layout=go.Layout(
                        title=dict(
                            text='Interactive visualization of the ECG Features (Histogram)'
                        ),
                        barmode='overlay',
                        xaxis=dict(title = ''), yaxis=dict(title = 'Counts[#]')
                    ))

def validate():
    if ECG_Feature_Widget.value in ECG_Features[1:Nf].unique():
        return True
    else:
        return False


def response(change):
            g.data[0].x = ECG_Features[ECG_Feature_Widget.value]
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = ''
            g.layout.yaxis.title = 'Counts[#]'


ECG_Feature_Widget.observe(response, names="value")

container = widgets.HBox([ECG_Feature_Widget])
widgets.VBox([container,g])
In [4]:
# Heatmap

ECG_Feature_Widget2 = widgets.Dropdown(
    options=list(ECG_Features.columns[0:Nf]),
    value='ECG_Rate_Mean',
    description='Feature :',
)
graphic2 = go.Heatmap(z=data.values.reshape(23,18),
                   y=[str('s'+str(k+1)) for k in range(0,23)],
                   x=[str('v'+str(j+1)) for j in range(0,18)])
g2 = go.FigureWidget(data=[graphic2],
                    layout=go.Layout(
                        title=dict(
                            text='Interactive visualization of the ECG Features (Heatmap)'
                        ),
                        barmode='overlay',
                        xaxis=dict(title = 'Video number'), yaxis=dict(title = 'Subject number')
                    ))

def validate2():
    if ECG_Feature_Widget2.value in ECG_Features[1:Nf].unique():
        return True
    else:
        return False


def response2(change):
            g2.data[0].z = ECG_Features[ECG_Feature_Widget2.value].values.reshape(23,18)
            g2.layout.barmode = 'overlay'
            #g2.layout.xaxis.title = ''
            #g2.layout.yaxis.title = 'Counts[#]'


ECG_Feature_Widget2.observe(response2, names="value")

container2 = widgets.HBox([ECG_Feature_Widget2])
widgets.VBox([container2,g2])
In [5]:
All_Features = pd.read_csv("../Data/All_Features_with_Age_Gender.csv")
del All_Features['Unnamed: 0']
Nf=All_Features.shape[1]-1
All_Features.head()

# Heatmap

All_Feature_Widget = widgets.Dropdown(
    options=list(All_Features.columns[0:Nf]),
    value='psdtheta_1',
    description='Feature :',
)

#graphic = go.Histogram(x=All_Features['All_Rate_Mean'], opacity=0.75, name='Some data')
data=All_Features['psdtheta_1']
graphic = go.Histogram(x=data, opacity=0.75, name='Some data')
g = go.FigureWidget(data=[graphic],
                    layout=go.Layout(
                        title=dict(
                            text='Interactive visualization of the DREAMER_Preprocessed_NotTransformed_NotThresholded.csv (Histogram)'
                        ),
                        barmode='overlay',
                        xaxis=dict(title = ''), yaxis=dict(title = 'Counts[#]')
                    ))

def validate():
    if All_Feature_Widget.value in All_Features[1:Nf].unique():
        return True
    else:
        return False


def response(change):
            g.data[0].x = All_Features[All_Feature_Widget.value]
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = ''
            g.layout.yaxis.title = 'Counts[#]'


All_Feature_Widget.observe(response, names="value")

container = widgets.HBox([All_Feature_Widget])
widgets.VBox([container,g])
In [6]:
# Heatmap

All_Feature_Widget2 = widgets.Dropdown(
    options=list(All_Features.columns[0:Nf]),
    value='psdtheta_1',
    description='Feature :',
)
graphic2 = go.Heatmap(z=data.values.reshape(23,18),
                   y=[str('s'+str(k+1)) for k in range(0,23)],
                   x=[str('v'+str(j+1)) for j in range(0,18)])
g2 = go.FigureWidget(data=[graphic2],
                    layout=go.Layout(
                        title=dict(
                            text='Interactive visualization of the EEG features (Heatmap)'
                        ),
                        barmode='overlay',
                        xaxis=dict(title = 'Video number'), yaxis=dict(title = 'Subject number')
                    ))

def validate2():
    if All_Feature_Widget2.value in All_Features[1:Nf].unique():
        return True
    else:
        return False


def response2(change):
            g2.data[0].z = All_Features[All_Feature_Widget2.value].values.reshape(23,18)
            g2.layout.barmode = 'overlay'
            #g2.layout.xaxis.title = ''
            #g2.layout.yaxis.title = 'Counts[#]'


All_Feature_Widget2.observe(response2, names="value")

container2 = widgets.HBox([All_Feature_Widget2])
widgets.VBox([container2,g2])
In [7]:
Valence=All_Features['Valence'].values
Arousal=All_Features['Arousal'].values
Pearson_R=np.empty((2,Nf-4))
p_values=np.empty((2,Nf-4))
for f in range(0,Nf-4):
    if not(All_Features.iloc[:,f].dtype==np.object):
        Pearson_R[0,f], p_values[0,f] = sp.stats.pearsonr(Valence,All_Features.iloc[:,f].values)
        Pearson_R[1,f], p_values[1,f] = sp.stats.pearsonr(Arousal,All_Features.iloc[:,f].values)

heatmap_Pearson_R = px.imshow(Pearson_R,
                y=['Valence','Arousal'],
                x=All_Features.columns[0:Nf-4]
                )
heatmap_Pearson_R.show()
In [8]:
heatmap_p_values = px.imshow(p_values,
                y=['Valence','Arousal'],
                x=All_Features.columns[0:Nf-4]#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_p_values.show()
In [9]:
corrMatrix = All_Features.corr()

heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='Cross-correlation matrix (DREAMER_Preprocessed_NotTransformed_NotThresholded.csv)',
                               
                y=All_Features.columns[0:Nf],
                x=All_Features.columns[0:Nf]#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_corrMatrix.show()
In [10]:
# Extracting the last 4 secondes of the ECG data (baseline)
fs_ECG = 256 #Hz : sampling rate
import math
N_ECG = math.ceil(fs_ECG*4) # Select a number of points corresponding to 4s

import scipy.io as sio
path=u'../Data/DREAMER.mat'
data=sio.loadmat(path)

for k in range(0,23):
    for j in range(0,18):
        basl_l=data['DREAMER'][0,0]['Data'][0,k]['ECG'][0,0]['baseline'][0,0][j,0][-1-N_ECG:-1,0]
        basl_r=data['DREAMER'][0,0]['Data'][0,k]['ECG'][0,0]['baseline'][0,0][j,0][-1-N_ECG:-1,1]
        basl_avg=(basl_l+basl_r)/2
        # Now we should do something with this 4s extract : TODO
In [11]:
# Visualization of the last 4s of an ECG signal

print(len(basl_avg))
import seaborn as sns
sns.lineplot(data=basl_avg)

# We could not extract the last 4s because Neurokit2 finds it's too short, it needs at least 40s of ECG data
1024
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9a5e344650>
In [12]:
# Extracting the last 4 seconds of the EEG data (baseline)

import scipy.io as sio
from scipy import signal
import numpy as np
import pandas as pd
from sklearn import preprocessing as pre
import math

fs_EEG = 128 #Hz : sampling rate
N_EEG = math.ceil(fs_EEG*4) # Select a number of points corresponding to 4s

def preprocessing(input,feature):
    overall=signal.firwin(9,[0.0625,0.46875],window='hamming')
    theta=signal.firwin(9,[0.0625,0.125],window='hamming')
    alpha=signal.firwin(9,[0.125,0.203125],window='hamming')
    beta=signal.firwin(9,[0.203125,0.46875],window='hamming')
    filtedData=signal.filtfilt(overall,1,input)
    filtedtheta=signal.filtfilt(theta,1,filtedData)
    filtedalpha=signal.filtfilt(alpha,1,filtedData)
    filtedbeta=signal.filtfilt(beta,1,filtedData)
    ftheta,psdtheta=signal.welch(filtedtheta,nperseg=256)
    falpha,psdalpha=signal.welch(filtedalpha,nperseg=256)
    fbeta,psdbeta=signal.welch(filtedbeta,nperseg=256)
    feature.append(max(psdtheta))
    feature.append(max(psdalpha))
    feature.append(max(psdbeta))
    return feature


if __name__ == '__main__':
    total=0
    path=u'../Data/DREAMER.mat'
    data=sio.loadmat(path)
    print("EEG signals are being feature extracted...")
    EEG_tmp=np.zeros((23,18,42))
    for k in range(0,23):
        for j in range(0,18):
            for i in range(0,14):
                B,S=[],[]
                basl=data['DREAMER'][0,0]['Data'][0,k]['EEG'][0,0]['baseline'][0,0][j,0][-1-N_EEG:-1,i]
                #stim=data['DREAMER'][0,0]['Data'][0,k]['EEG'][0,0]['stimuli'][0,0][j,0][-1-N_EEG:-1,i]
                B=preprocessing(basl,B)
                #S=preprocessing(stim,S)
                Extrod=B#np.divide(S,B)
                total+=1
                EEG_tmp[k,j,3*i]=Extrod[0]
                EEG_tmp[k,j,3*i+1]=Extrod[1]
                EEG_tmp[k,j,3*i+2]=Extrod[2]
                print("\rprogress: %d%%" %(total/(23*18*14)*100),end="")
    col=[]
    for i in range(0,14):
        col.append('psdtheta_'+str(i + 1)+'_un')
        col.append('psdalpha_'+str(i + 1)+'_un')
        col.append('psdbeta_'+str(i + 1)+'_un')
    EEG=pd.DataFrame(EEG_tmp.reshape((23 * 18,EEG_tmp.shape[2])),columns=col)
    scaler=pre.StandardScaler()
    for i in range(len(col)):
        EEG[col[i][:-3]]=scaler.fit_transform(EEG[[col[i]]])
    EEG.drop(col,axis=1,inplace=True)
    print(EEG)
    EEG.to_csv('../Data/Extracted_EEG_last4s.csv')
EEG signals are being feature extracted...
progress: 100%     psdtheta_1  psdalpha_1  psdbeta_1  psdtheta_2  psdalpha_2  psdbeta_2  \
0     -0.181097   -0.183079  -0.162763   -0.164206   -0.166005  -0.165722   
1     -0.180781   -0.182764  -0.162617   -0.163658   -0.165460  -0.165768   
2     -0.181023   -0.183005  -0.162728   -0.163551   -0.165352  -0.165732   
3     -0.180947   -0.182930  -0.162694   -0.163492   -0.165294  -0.165701   
4     -0.181134   -0.183115  -0.162783   -0.163567   -0.165369  -0.165722   
..          ...         ...        ...         ...         ...        ...   
409   -0.182190   -0.184112  -0.162428   -0.163380   -0.165049  -0.163543   
410   -0.182097   -0.183973  -0.160927   -0.164377   -0.166153  -0.165682   
411   -0.182501   -0.184462  -0.163008   -0.164449   -0.166231  -0.165901   
412   -0.182431   -0.184370  -0.162583   -0.164322   -0.166097  -0.164981   
413   -0.182469   -0.184429  -0.163107   -0.164322   -0.166088  -0.165463   

     psdtheta_3  psdalpha_3  psdbeta_3  psdtheta_4  ...  psdbeta_11  \
0     -0.151888   -0.151310  -0.101514   -0.107896  ...   -0.166487   
1     -0.155448   -0.155951  -0.125252   -0.143867  ...   -0.161310   
2     -0.156683   -0.157927  -0.143867   -0.143194  ...   -0.176795   
3     -0.155842   -0.156396  -0.127366   -0.139328  ...   -0.158520   
4     -0.154435   -0.154809  -0.119832   -0.144510  ...   -0.146950   
..          ...         ...        ...         ...  ...         ...   
409   -0.161058   -0.162284  -0.155360   -0.169690  ...   -0.188054   
410   -0.160987   -0.162206  -0.155445   -0.169632  ...   -0.187863   
411   -0.161062   -0.162289  -0.155580   -0.169641  ...   -0.187487   
412   -0.160918   -0.162108  -0.154372   -0.169492  ...   -0.187735   
413   -0.161002   -0.162224  -0.155437   -0.169157  ...   -0.187964   

     psdtheta_12  psdalpha_12  psdbeta_12  psdtheta_13  psdalpha_13  \
0      -0.181325    -0.183209   -0.147505    -0.146856    -0.150206   
1      -0.182550    -0.184799   -0.153178    -0.216407    -0.219037   
2      -0.183670    -0.186054   -0.157299    -0.215280    -0.217918   
3      -0.182260    -0.184475   -0.152115    -0.180175    -0.183190   
4      -0.181521    -0.183647   -0.149395    -0.179170    -0.182199   
..           ...          ...         ...          ...          ...   
409    -0.138989    -0.133005    0.023079    -0.224227    -0.226761   
410     0.000957     0.032910    0.586399    -0.224181    -0.226711   
411    -0.060833    -0.040251    0.338782    -0.224188    -0.226708   
412    -0.153602    -0.150332   -0.035887    -0.224039    -0.226552   
413    -0.181879    -0.184116   -0.156114    -0.224014    -0.226527   

     psdbeta_13  psdtheta_14  psdalpha_14  psdbeta_14  
0     -0.144895    -0.091804    -0.096700   -0.109874  
1     -0.161700    -0.190384    -0.193340   -0.138654  
2     -0.175408    -0.192917    -0.195818   -0.150653  
3     -0.159505    -0.146952    -0.150778   -0.137000  
4     -0.149346    -0.129895    -0.134071   -0.127716  
..          ...          ...          ...         ...  
409   -0.185225    -0.203977    -0.206605   -0.158525  
410   -0.185190    -0.204153    -0.206812   -0.159055  
411   -0.185018    -0.204036    -0.206674   -0.158700  
412   -0.184569    -0.204071    -0.206715   -0.158808  
413   -0.185161    -0.203988    -0.206641   -0.159123  

[414 rows x 42 columns]
In [13]:
# Ratio between the EEG features and the corresponding baselines
All_Features = pd.read_csv("../Data/DREAMER_Preprocessed_NotTransformed_NotThresholded.csv")
del All_Features['Unnamed: 0']
Last4s_EEG_Features = pd.read_csv("../Data/Extracted_EEG_last4s.csv")
del Last4s_EEG_Features['Unnamed: 0']

Features_Ratio = All_Features[Last4s_EEG_Features.columns].div(Last4s_EEG_Features)
Features_Ratio.head(100)

Features_Ratio = Features_Ratio.join(All_Features['Valence'])
Features_Ratio = Features_Ratio.join(All_Features['Arousal'])

corrMatrix = Features_Ratio.corr()
heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='Cross-correlation matrix (Ratio of whole data over last 4s)',
                               
                y=Features_Ratio.columns,
                x=Features_Ratio.columns#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_corrMatrix.show()
In [14]:
Correlation_Threshold_Valence = 0.04
Correlation_Threshold_Arousal = 0.04
Mask_Valence=corrMatrix > Correlation_Threshold_Valence
Mask_Arousal=corrMatrix > Correlation_Threshold_Arousal
Relevant_Features_List_Valence = Features_Ratio.columns[Mask_Valence.iloc[-2,:]]
Relevant_Features_List_Valence=Relevant_Features_List_Valence[:-1] # Delete the last element which has cross-correlation of 1
Relevant_Features_List_Arousal = Features_Ratio.columns[Mask_Arousal.iloc[-1,:]]
Relevant_Features_List_Arousal=Relevant_Features_List_Arousal[:-1] # Delete the last element which has cross-correlation of 1
print(Relevant_Features_List_Valence)
print(Relevant_Features_List_Arousal)
# Quite deceiving : Very low correlations
print(Relevant_Features_List_Valence.dtype)
Index(['psdbeta_1', 'psdtheta_2', 'psdalpha_2', 'psdbeta_2', 'psdbeta_5'], dtype='object')
Index(['psdalpha_12', 'psdbeta_12'], dtype='object')
object
In [15]:
# 1. MinMax the data

All_Features = pd.read_csv("../Data/DREAMER_Preprocessed_NotTransformed_NotThresholded.csv")
del All_Features['Unnamed: 0']
Last4s_EEG_Features = pd.read_csv("../Data/Extracted_EEG_last4s.csv")
del Last4s_EEG_Features['Unnamed: 0']

for column in All_Features.columns:
    if not(All_Features[column].dtype == np.object):
        All_Features[column]=(All_Features[column]-np.min(All_Features[column]))/(np.max(All_Features[column])-np.min(All_Features[column]))
for column in Last4s_EEG_Features.columns:
    if not(Last4s_EEG_Features[column].dtype == np.object):
        Last4s_EEG_Features[column]=(Last4s_EEG_Features[column]-np.min(Last4s_EEG_Features[column]))/(np.max(Last4s_EEG_Features[column])-np.min(Last4s_EEG_Features[column]))

# 2. Ratio between the EEG features and the corresponding baselines

Features_Ratio = All_Features[Last4s_EEG_Features.columns].div(Last4s_EEG_Features)
Features_Ratio.head(100)

# Heatmap of the cross-correlations with Valence and Arousal without thresholding

Features_Ratio = Features_Ratio.join(All_Features['Valence'])
Features_Ratio = Features_Ratio.join(All_Features['Arousal'])

corrMatrix = Features_Ratio.corr()
heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='(min-max) Cross-correlation matrix (Ratio of the whole data over the last 4s)',
                               
                y=Features_Ratio.columns,
                x=Features_Ratio.columns#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_corrMatrix.show()

# 3. Threshold Valence and Arousal

All_Features['Valence'].loc[All_Features['Valence']<0.5]=0
All_Features['Valence'].loc[All_Features['Valence']>=0.5]=1
All_Features['Arousal'].loc[All_Features['Arousal']<0.5]=0
All_Features['Arousal'].loc[All_Features['Arousal']>=0.5]=1

Features_Ratio = All_Features[Last4s_EEG_Features.columns].div(Last4s_EEG_Features)
Features_Ratio = Features_Ratio.join(All_Features['Valence'])
Features_Ratio = Features_Ratio.join(All_Features['Arousal'])

# Heatmap of the cross-correlations with Valence and Arousal without thresholding

print(np.shape(Features_Ratio))
corrMatrix = Features_Ratio.corr()
heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='(min-max)(Valence and Arousal Thresholded) Cross-correlation matrix (Ratio of the whole data over the last 4s)',
                               
                y=Features_Ratio.columns,
                x=Features_Ratio.columns#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_corrMatrix.show()
(414, 44)
/home/akabbabi/miniconda3/lib/python3.7/site-packages/pandas/core/indexing.py:671: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [16]:
# 1. zscore the data

All_Features = pd.read_csv("../Data/All_Features_with_Age_Gender.csv")
del All_Features['Unnamed: 0']
Last4s_EEG_Features = pd.read_csv("../Data/Extracted_EEG_last4s.csv")
del Last4s_EEG_Features['Unnamed: 0']

for column in All_Features.columns:
    if not(All_Features[column].dtype == np.object):
        All_Features[column]=sp.stats.zscore(All_Features[column])
for column in Last4s_EEG_Features.columns:
    if not(Last4s_EEG_Features[column].dtype == np.object):
        Last4s_EEG_Features[column]=sp.stats.zscore(Last4s_EEG_Features[column])

# 2. Ratio between the EEG features and the corresponding baselines

Features_Ratio = All_Features[Last4s_EEG_Features.columns].div(Last4s_EEG_Features)
Features_Ratio.head(100)

# Heatmap of the cross-correlations with Valence and Arousal without thresholding

Features_Ratio = Features_Ratio.join(All_Features['Valence'])
Features_Ratio = Features_Ratio.join(All_Features['Arousal'])

corrMatrix = Features_Ratio.corr()
heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='(z-scored) Cross-correlation matrix (Ratio of the whole data over the last 4s)',
                               
                y=Features_Ratio.columns,
                x=Features_Ratio.columns
                )

heatmap_corrMatrix.show()

# 3. Threshold Valence and Arousal

All_Features['Valence'].loc[All_Features['Valence']<0.5]=0
All_Features['Valence'].loc[All_Features['Valence']>=0.5]=1
All_Features['Arousal'].loc[All_Features['Arousal']<0.5]=0
All_Features['Arousal'].loc[All_Features['Arousal']>=0.5]=1

Features_Ratio = All_Features[Last4s_EEG_Features.columns].div(Last4s_EEG_Features)
Features_Ratio = Features_Ratio.join(All_Features['Valence'])
Features_Ratio = Features_Ratio.join(All_Features['Arousal'])

# Heatmap of the cross-correlations with Valence and Arousal without thresholding

#Features_Ratio=sp.stats.zscore(Features_Ratio)
print(np.shape(Features_Ratio))
corrMatrix = Features_Ratio.corr()
heatmap_corrMatrix = px.imshow(corrMatrix,
                               title='(z-scored)(Valence and Arousal Thresholded) Cross-correlation matrix (Ratio of the whole data over the last 4s)',
                               
                y=Features_Ratio.columns,
                x=Features_Ratio.columns#[str('f'+str(j+1)) for j in range(0,Nf-4)]
                )

heatmap_corrMatrix.show()
(414, 44)